Morphological variation of amazon and sailfin mollies

In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.

Data collection

## Using libcurl 7.64.1 with Schannel

Correcting for body size (residuals)

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales below lateral line and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)


##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.P2.L <- lm(lat$P2.L ~ lat$SL)
sd.lat.P2.L <- rstandard(reg.lat.P2.L)
reg.lat.P2.L.plot <- ggplot(lat, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.P2.R <- lm(lat$P2.R ~ lat$SL)
sd.lat.P2.R <- rstandard(reg.lat.P2.R)
reg.lat.P2.R.plot <- ggplot(lat, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.BD <- lm(lat$BD ~ lat$SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = SL, y = BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.CPD <- lm(lat$CPD ~ lat$SL)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL, y = CPD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.CPL <- lm(lat$CPL ~ lat$SL)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL, y = CPL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.PreDL <- lm(lat$PreDL ~ lat$SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = SL, y = PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.DbL <- lm(lat$DbL ~ lat$SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = SL, y = DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.HL <- lm(lat$HL ~ lat$SL)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL, y = HL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.HD <- lm(lat$HD ~ lat$SL)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL, y = HD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.HW <- lm(lat$HW ~ lat$SL)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL, y = HW)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.SnL <- lm(lat$SnL ~ lat$SL)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL, y = SnL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.OL <- lm(lat$OL ~ lat$SL)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL, y = OL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula 'y ~ x'

##### FORM #####

reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.P2.L <- lm(form$P2.L ~ form$SL)
sd.form.P2.L <- rstandard(reg.form.P2.L)
reg.form.P2.L.plot <- ggplot(form, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.P2.R <- lm(form$P2.R ~ form$SL)
sd.form.P2.R <- rstandard(reg.form.P2.R)
reg.form.P2.R.plot <- ggplot(form, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.BD <- lm(form$BD ~ form$SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = SL, y = BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.CPD <- lm(form$CPD ~ form$SL)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL, y = CPD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.CPL <- lm(form$CPL ~ form$SL)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL, y = CPL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.PreDL <- lm(form$PreDL ~ form$SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = SL, y = PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.DbL <- lm(form$DbL ~ form$SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = SL, y = DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.HL <- lm(form$HL ~ form$SL)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL, y = HL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.HD <- lm(form$HD ~ form$SL)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL, y = HD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.HW <- lm(form$HW ~ form$SL)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL, y = HW)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.SnL <- lm(form$SnL ~ form$SL)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL, y = SnL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.OL <- lm(form$OL ~ form$SL)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL, y = OL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula 'y ~ x'

##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.P2.L <- lm(mex$P2.L ~ mex$SL)
sd.mex.P2.L <- rstandard(reg.mex.P2.L)
reg.mex.P2.L.plot <- ggplot(mex, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.L.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.P2.R <- lm(mex$P2.R ~ mex$SL)
sd.mex.P2.R <- rstandard(reg.mex.P2.R)
reg.mex.P2.R.plot <- ggplot(mex, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.BD <- lm(mex$BD ~ mex$SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = SL, y = BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.CPD <- lm(mex$CPD ~ mex$SL)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL, y = CPD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.CPL <- lm(mex$CPL ~ mex$SL)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL, y = CPL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.PreDL <- lm(mex$PreDL ~ mex$SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = SL, y = PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.DbL <- lm(mex$DbL ~ mex$SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = SL, y = DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.HL <- lm(mex$HL ~ mex$SL)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL, y = HL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.HD <- lm(mex$HD ~ mex$SL)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL, y = HD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.HW <- lm(mex$HW ~ mex$SL)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL, y = HW)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.SnL <- lm(mex$SnL ~ mex$SL)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL, y = SnL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.OL <- lm(mex$OL ~ mex$SL)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL, y = OL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula 'y ~ x'

reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SALL <- abs(res.lat.SALL)
mean(abs.lat.SALL)
## [1] 0.3174361
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.7662013
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.3693678
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.463331
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.5692506
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.6943292
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.5195023
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.3736227
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.3532098
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.3559954
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 0.242467
##### FORM #####

abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SALL <- abs(res.form.SALL)
mean(abs.form.SALL)
## [1] 0.3601306
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.6992201
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.3242864
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.4841018
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.5943769
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.5507415
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.7175548
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.3866209
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.3756333
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.2819469
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 0.2013391
##### MEX #####

abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SALL <- abs(res.mex.SALL)
mean(abs.mex.SALL)
## [1] 0.1248197
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.6849863
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.3413966
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.4422817
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 1.233006
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.4028121
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.3375982
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.3266186
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.2523109
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.263338
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 0.1157007
#let's get this into the raw1 data set so that we can plot this more easily

abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SALL<- c(abs.lat.SALL, abs.form.SALL, abs.mex.SALL)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)


raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SALL, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
## Warning: package 'ggbeeswarm' was built under R version 4.1.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)

scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)

ggplot(raw2, aes(SPP, abs.res.P1)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SALL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Comparing variation (F & T-tests)

F tests on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F tests on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F test of variance on the raw data.

Quick results summary: For the F-tests on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are right pectoral (lat>form), scales below lateral line (form>lat), dorsal base length (lat>form), head length (form>lat), and ocular length (lat>form).

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

F tests of variation

This is for traits NOT influenced by body size.

## 
##  F test to compare two variances
## 
## data:  lat$P2.L and form$P2.L
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
## 
##  F test to compare two variances
## 
## data:  lat$P2.R and form$P2.R
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
## 
##  F test to compare two variances
## 
## data:  lat$A and form$A
## F = 1.1322, num df = 133, denom df = 165, p-value = 0.4474
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8210231 1.5703926
## sample estimates:
## ratio of variances 
##           1.132245
## 
##  F test to compare two variances
## 
## data:  lat$SBDF and form$SBDF
## F = 0.76031, num df = 133, denom df = 165, p-value = 0.1003
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5513254 1.0545347
## sample estimates:
## ratio of variances 
##          0.7603142
## 
##  F test to compare two variances
## 
## data:  lat$FLA and form$FLA
## F = 0.73321, num df = 133, denom df = 165, p-value = 0.06289
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5316692 1.0169377
## sample estimates:
## ratio of variances 
##           0.733207





T tests on residuals

This will be for traits that WERE influenced by body size. The absolute value of the residuals will be used.

## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.D and abs.form.D
## t = -0.61099, df = 263.7, p-value = 0.5417
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12358398  0.06504966
## sample estimates:
## mean of x mean of y 
## 0.5375505 0.5668177
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.P1 and abs.form.P1
## t = 1.2856, df = 291.88, p-value = 0.1996
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03307996  0.15769016
## sample estimates:
## mean of x mean of y 
## 0.5466667 0.4843616
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.P1.R and abs.form.P1.R
## t = 3.4656, df = 297.09, p-value = 0.0006071
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06729909 0.24417289
## sample estimates:
## mean of x mean of y 
## 0.5799393 0.4242033
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.LLSC and abs.form.LLSC
## t = -1.7461, df = 297.86, p-value = 0.08182
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.29241758  0.01746627
## sample estimates:
## mean of x mean of y 
## 0.7664044 0.9038801
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SALL and abs.form.SALL
## t = -0.97143, df = 190.92, p-value = 0.3326
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12938485  0.04399579
## sample estimates:
## mean of x mean of y 
## 0.3174361 0.3601306
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SBLL and abs.form.SBLL
## t = -2.2128, df = 289.71, p-value = 0.02769
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.17721884 -0.01036833
## sample estimates:
## mean of x mean of y 
## 0.2461336 0.3399272
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.BD and abs.form.BD
## t = 0.97705, df = 264.74, p-value = 0.3294
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0680000  0.2019624
## sample estimates:
## mean of x mean of y 
## 0.7662013 0.6992201
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 1.2267, df = 253.07, p-value = 0.2211
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02729385  0.11745664
## sample estimates:
## mean of x mean of y 
## 0.3693678 0.3242864
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPL and abs.form.CPL
## t = -0.46237, df = 286.17, p-value = 0.6442
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.10919098  0.06764928
## sample estimates:
## mean of x mean of y 
## 0.4633310 0.4841018
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.PreDL and abs.form.PreDL
## t = -0.48108, df = 289.01, p-value = 0.6308
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12792439  0.07767191
## sample estimates:
## mean of x mean of y 
## 0.5692506 0.5943769
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.DbL and abs.form.DbL
## t = 2.3093, df = 245.06, p-value = 0.02176
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02111617 0.26605913
## sample estimates:
## mean of x mean of y 
## 0.6943292 0.5507415
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HL and abs.form.HL
## t = -2.6624, df = 276.26, p-value = 0.008213
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.34449292 -0.05161215
## sample estimates:
## mean of x mean of y 
## 0.5195023 0.7175548
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HD and abs.form.HD
## t = -0.34174, df = 292.51, p-value = 0.7328
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08785618  0.06185970
## sample estimates:
## mean of x mean of y 
## 0.3736227 0.3866209
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HW and abs.form.HW
## t = -0.63, df = 294.01, p-value = 0.5292
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.09247196  0.04762502
## sample estimates:
## mean of x mean of y 
## 0.3532098 0.3756333
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SnL and abs.form.SnL
## t = 0.9727, df = 146.25, p-value = 0.3323
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07640178  0.22449877
## sample estimates:
## mean of x mean of y 
## 0.3559954 0.2819469
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.OL and abs.form.OL
## t = 1.9818, df = 292.65, p-value = 0.04844
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0002843681 0.0819713019
## sample estimates:
## mean of x mean of y 
## 0.2424670 0.2013391

Summary of variance results

average.table <- matrix(c(ttest.abs.D$p.value, mean(abs.lat.D, na.rm = TRUE), mean(abs.form.D, na.rm = TRUE), ttest.abs.P1$p.value, mean(abs.lat.P1, na.rm = TRUE), mean(abs.form.P1, na.rm = TRUE), var.rig.pel$p.value, mean(lat$P2.R, na.rm = TRUE), mean(form$P2.R, na.rm = TRUE), var.left.pel$p.value, mean(lat$P2.L, na.rm = TRUE), mean(form$P2.L, na.rm = TRUE), var.anal$p.value, mean(lat$A, na.rm = TRUE), mean(form$A, na.rm = TRUE), ttest.abs.P1.R$p.value, mean(abs.lat.P1.R, na.rm = TRUE), mean(abs.form.P1.R, na.rm = TRUE), ttest.abs.LLSC$p.value, mean(abs.lat.LLSC, na.rm = TRUE), mean(abs.form.LLSC, na.rm = TRUE), ttest.abs.SALL$p.value, mean(abs.lat.SALL, na.rm = TRUE), mean(abs.form.SALL, na.rm = TRUE), ttest.abs.SBLL$p.value, mean(abs.lat.SBLL, na.rm = TRUE), mean(abs.form.SBLL, na.rm = TRUE), var.sca.df$p.value, mean(lat$SBDF, na.rm = TRUE), mean(form$SBDF, na.rm = TRUE), ttest.abs.BD$p.value, mean(abs.lat.BD, na.rm = TRUE), mean(abs.form.BD, na.rm = TRUE), ttest.abs.CPD$p.value, mean(abs.lat.CPD, na.rm = TRUE), mean(abs.form.CPD, na.rm = TRUE), ttest.abs.CPL$p.value, mean(abs.lat.CPL, na.rm = TRUE), mean(abs.form.CPL, na.rm = TRUE), ttest.abs.PreDL$p.value, mean(abs.lat.PreDL, na.rm = TRUE), mean(abs.form.PreDL, na.rm = TRUE), ttest.abs.DbL$p.value, mean(abs.lat.DbL, na.rm = TRUE), mean(abs.form.DbL, na.rm = TRUE), ttest.abs.HL$p.value, mean(abs.lat.HL, na.rm = TRUE), mean(abs.form.HL, na.rm = TRUE), ttest.abs.HD$p.value, mean(abs.lat.HD, na.rm = TRUE), mean(abs.form.HD, na.rm = TRUE), ttest.abs.HW$p.value, mean(abs.lat.HW, na.rm = TRUE), mean(abs.form.HW, na.rm = TRUE), ttest.abs.SnL$p.value, mean(abs.lat.SnL, na.rm = TRUE), mean(abs.form.SnL, na.rm = TRUE), ttest.abs.OL$p.value, mean(abs.lat.OL, na.rm = TRUE), mean(abs.form.OL, na.rm = TRUE), var.flu.asy$p.value, mean(lat$FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c('p-value for variance', 'lat mean', 'form mean')

rownames(average.table) <- c('dorsal rays', 'L pect rays', 'R pelvic rays', 'L pelvic rays', 'Anal rays', 'R pect rays', 'lat line scales', 'scales above LL', 'scales below LL', 'scales before dor', 'body dep', 'peduncle dep', 'peduncle leng', 'pre-dorsal', 'dorsal base', 'head length', 'head depth', 'head width', 'snout leng', 'orbital leng', 'fluct. asymmetries')

average.table
##                    p-value for variance  lat mean form mean
## dorsal rays                0.5417294696 0.5375505 0.5668177
## L pect rays                0.1996120474 0.5466667 0.4843616
## R pelvic rays                       NaN 6.0000000 6.0000000
## L pelvic rays                       NaN 6.0000000 6.0000000
## Anal rays                  0.4474165527 9.6641791 9.6987952
## R pect rays                0.0006070862 0.5799393 0.4242033
## lat line scales            0.0818216807 0.7664044 0.9038801
## scales above LL            0.3325634204 0.3174361 0.3601306
## scales below LL            0.0276911181 0.2461336 0.3399272
## scales before dor          0.1002870398 8.5597015 9.7228916
## body dep                   0.3294353578 0.7662013 0.6992201
## peduncle dep               0.2210765424 0.3693678 0.3242864
## peduncle leng              0.6441656866 0.4633310 0.4841018
## pre-dorsal                 0.6308271871 0.5692506 0.5943769
## dorsal base                0.0217592359 0.6943292 0.5507415
## head length                0.0082132797 0.5195023 0.7175548
## head depth                 0.7327921907 0.3736227 0.3866209
## head width                 0.5291809737 0.3532098 0.3756333
## snout leng                 0.3323072959 0.3559954 0.2819469
## orbital leng               0.0484363720 0.2424670 0.2013391
## fluct. asymmetries         0.0628880109 1.3283582 1.4337349

Visualization of significant results

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

raw3 <- raw2[raw2$SPP !="p.mexicana", ]


plot_multi_histogram(raw3, "abs.res.P1.R", "SPP")

plot_multi_histogram(raw3, "abs.res.SBLL", "SPP")

plot_multi_histogram(raw3, "abs.res.DbL", "SPP")

plot_multi_histogram(raw3, "abs.res.HL", "SPP")

plot_multi_histogram(raw3, "abs.res.OL", "SPP")

ANOVA

Will run an ANOVA on the residuals with location and species as fixed effects. This will show me if morphology depends on the species, the location, and if the location and species interact to determine morphology.

I will first run this using the zones as the location factor. Zones (1-4) represent the latitude range with equivalent sample sizes in each, since the collections were not equally representative of all latitudes, and I wanted to avoid a sampling bias when randomly selecting samples. Zone 1 corresponds to the southern most latitude range, and zone 4 corresponds to the northern most latitude range.

I will then run the same analysis using basin as the location factor. Since fish are physically isolated to the river basins they occupy, the genetic variation is also limited to that basin. Thus it is possible for fish within the same basin to be more similar due to genetic/physical constraints. (will also do with watershed just to see).

Lastly I will run ANOVAs with both zones and basins but with standardized residuals. This would allow me to compare overall variation across traits (at least those that are depended on body size) rather than just one trait at a time. Not 100% sure if this is useful (or correct to do), but thought it would be interesting.

Zones

library(ggplot2)

A.D <- aov(abs.res.D ~ SPP*QUARTILE, data=raw3)
summary(A.D)
##               Df Sum Sq Mean Sq F value  Pr(>F)    
## SPP            1   0.06  0.0635   0.408 0.52344    
## QUARTILE       3   0.45  0.1510   0.970 0.40715    
## SPP:QUARTILE   3   3.22  1.0719   6.888 0.00017 ***
## Residuals    292  45.44  0.1556                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A.P1 <- aov(abs.res.P1 ~ SPP*QUARTILE, data=raw3)
summary(A.P1)
##               Df Sum Sq Mean Sq F value Pr(>F)
## SPP            1   0.29  0.2878   1.554  0.214
## QUARTILE       3   1.10  0.3663   1.977  0.117
## SPP:QUARTILE   3   0.73  0.2427   1.310  0.271
## Residuals    292  54.10  0.1853
A.P1.R <- aov(abs.res.P1.R ~ SPP*QUARTILE, data=raw3)
summary(A.P1.R)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP            1   1.80  1.7983  11.423 0.000824 ***
## QUARTILE       3   0.13  0.0434   0.276 0.842918    
## SPP:QUARTILE   3   0.06  0.0189   0.120 0.948162    
## Residuals    292  45.97  0.1574                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A.LLSC <- aov(abs.res.LLSC ~ SPP*QUARTILE, data=raw3)
summary(A.LLSC)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   1.40  1.4013   2.940 0.0875 .
## QUARTILE       3   1.35  0.4506   0.945 0.4190  
## SPP:QUARTILE   3   3.42  1.1399   2.391 0.0688 .
## Residuals    292 139.19  0.4767                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A.SALL <- aov(abs.res.SALL ~ SPP*QUARTILE, data=raw3)
summary(A.SALL)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP            1   0.14  0.1352   1.151    0.284    
## QUARTILE       3   3.23  1.0782   9.182 7.99e-06 ***
## SPP:QUARTILE   3   0.25  0.0845   0.720    0.541    
## Residuals    292  34.29  0.1174                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.SALL, fill=SPP)) +
  geom_boxplot()

A.SBLL <- aov(abs.res.SBLL ~ SPP*QUARTILE, data=raw3)
summary(A.SBLL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   0.65  0.6523   4.859 0.0283 *
## QUARTILE       3   0.78  0.2591   1.930 0.1248  
## SPP:QUARTILE   3   0.12  0.0390   0.291 0.8320  
## Residuals    292  39.20  0.1342                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A.BD <- aov(abs.res.BD ~ SPP*QUARTILE, data=raw3)
summary(A.BD)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   0.33  0.3327   1.006 0.3166  
## QUARTILE       3   1.11  0.3694   1.117 0.3422  
## SPP:QUARTILE   3   3.11  1.0364   3.136 0.0259 *
## Residuals    292  96.51  0.3305                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A.CPD <- aov(abs.res.CPD ~ SPP*QUARTILE, data=raw3)
summary(A.CPD)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1  0.151  0.1507   1.606 0.2061  
## QUARTILE       3  1.020  0.3399   3.622 0.0136 *
## SPP:QUARTILE   3  0.093  0.0311   0.332 0.8025  
## Residuals    292 27.405  0.0939                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A.CPL <- aov(abs.res.CPL ~ SPP*QUARTILE, data=raw3)
summary(A.CPL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   0.03  0.0320   0.217 0.6418  
## QUARTILE       3   1.51  0.5018   3.402 0.0182 *
## SPP:QUARTILE   3   0.13  0.0442   0.300 0.8257  
## Residuals    292  43.07  0.1475                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.CPL, fill=SPP)) +
  geom_boxplot()

A.PreDL <- aov(abs.res.PreDL ~ SPP*QUARTILE, data=raw3)
summary(A.PreDL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   0.05  0.0468   0.235 0.6282  
## QUARTILE       3   0.41  0.1357   0.681 0.5640  
## SPP:QUARTILE   3   2.22  0.7416   3.724 0.0118 *
## Residuals    292  58.15  0.1991                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.PreDL, fill=SPP)) +
  geom_boxplot()

A.DbL <- aov(abs.res.DbL ~ SPP*QUARTILE, data=raw3)
summary(A.DbL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1   1.53  1.5287   5.617 0.0184 *
## QUARTILE       3   0.85  0.2838   1.043 0.3740  
## SPP:QUARTILE   3   0.51  0.1698   0.624 0.6002  
## Residuals    292  79.48  0.2722                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A.HL <- aov(abs.res.HL ~ SPP*QUARTILE, data=raw3)
summary(A.HL)
##               Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP            1   2.91   2.908   6.779 0.00969 **
## QUARTILE       3   3.96   1.320   3.077 0.02795 * 
## SPP:QUARTILE   3   6.08   2.028   4.727 0.00309 **
## Residuals    292 125.28   0.429                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A.HD <- aov(abs.res.HD ~ SPP*QUARTILE, data=raw3)
summary(A.HD)
##               Df Sum Sq Mean Sq F value Pr(>F)
## SPP            1   0.01 0.01253   0.114  0.736
## QUARTILE       3   0.11 0.03551   0.323  0.809
## SPP:QUARTILE   3   0.27 0.09140   0.831  0.478
## Residuals    292  32.12 0.11001
A.HW <- aov(abs.res.HW ~ SPP*QUARTILE, data=raw3)
summary(A.HW)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP            1  0.037  0.0373   0.414 0.520577    
## QUARTILE       3  1.713  0.5710   6.336 0.000356 ***
## SPP:QUARTILE   3  0.562  0.1875   2.081 0.102864    
## Residuals    292 26.311  0.0901                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A.SnL <- aov(abs.res.SnL ~ SPP*QUARTILE, data=raw3)
summary(A.SnL)
##               Df Sum Sq Mean Sq F value Pr(>F)
## SPP            1   0.41  0.4066   1.144  0.286
## QUARTILE       3   0.93  0.3090   0.869  0.457
## SPP:QUARTILE   3   1.19  0.3977   1.119  0.342
## Residuals    292 103.79  0.3554
A.OL <- aov(abs.res.OL ~ SPP*QUARTILE, data=raw3)
summary(A.OL)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## SPP            1  0.125 0.12542   3.927 0.0485 *
## QUARTILE       3  0.167 0.05572   1.745 0.1579  
## SPP:QUARTILE   3  0.187 0.06248   1.956 0.1207  
## Residuals    292  9.326 0.03194                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(QUARTILE), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Basins

A1.D <- aov(abs.res.D ~ SPP*BASIN, data=raw3)
summary(A1.D)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP           1   0.06  0.0635   0.402 0.52666   
## BASIN         6   3.12  0.5205   3.293 0.00377 **
## SPP:BASIN     3   0.31  0.1028   0.650 0.58344   
## Residuals   289  45.68  0.1581                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A1.P1 <- aov(abs.res.P1 ~ SPP*BASIN, data=raw3)
summary(A1.P1)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.29  0.2878   1.528  0.217
## BASIN         6   0.97  0.1613   0.856  0.528
## SPP:BASIN     3   0.53  0.1751   0.930  0.427
## Residuals   289  54.43  0.1883
A1.P1.R <- aov(abs.res.P1.R ~ SPP*BASIN, data=raw3)
summary(A1.P1.R)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP           1   1.80  1.7983  11.421 0.000826 ***
## BASIN         6   0.51  0.0845   0.537 0.780341    
## SPP:BASIN     3   0.15  0.0485   0.308 0.819718    
## Residuals   289  45.50  0.1575                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A1.LLSC <- aov(abs.res.LLSC ~ SPP*BASIN, data=raw3)
summary(A1.LLSC)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   1.40  1.4013   2.999 0.0844 .
## BASIN         6   6.83  1.1376   2.435 0.0260 *
## SPP:BASIN     3   2.11  0.7026   1.504 0.2137  
## Residuals   289 135.02  0.4672                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.LLSC, fill=SPP)) +
  geom_boxplot()

A1.SALL <- aov(abs.res.SALL ~ SPP*BASIN, data=raw3)
summary(A1.SALL)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP           1   0.14  0.1352   1.156    0.283    
## BASIN         6   3.54  0.5905   5.051 6.03e-05 ***
## SPP:BASIN     3   0.45  0.1496   1.280    0.281    
## Residuals   289  33.79  0.1169                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.SALL, fill=SPP)) +
  geom_boxplot()

A1.SBLL <- aov(abs.res.SBLL ~ SPP*BASIN, data=raw3)
summary(A1.SBLL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   0.65  0.6523   4.895 0.0277 *
## BASIN         6   1.07  0.1785   1.340 0.2393  
## SPP:BASIN     3   0.51  0.1716   1.288 0.2788  
## Residuals   289  38.51  0.1332                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A1.BD <- aov(abs.res.BD ~ SPP*BASIN, data=raw3)
summary(A1.BD)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP           1   0.33  0.3327   1.050 0.30643   
## BASIN         6   6.94  1.1571   3.651 0.00165 **
## SPP:BASIN     3   2.20  0.7349   2.319 0.07561 . 
## Residuals   289  91.58  0.3169                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A1.CPD <- aov(abs.res.CPD ~ SPP*BASIN, data=raw3)
summary(A1.CPD)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP           1  0.151 0.15069   1.626 0.20327   
## BASIN         6  1.601 0.26682   2.879 0.00968 **
## SPP:BASIN     3  0.135 0.04508   0.486 0.69198   
## Residuals   289 26.782 0.09267                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A1.CPL <- aov(abs.res.CPL ~ SPP*BASIN, data=raw3)
summary(A1.CPL)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.03 0.03199   0.213  0.645
## BASIN         6   1.00 0.16720   1.114  0.354
## SPP:BASIN     3   0.34 0.11217   0.748  0.525
## Residuals   289  43.36 0.15005
A1.PreDL <- aov(abs.res.PreDL ~ SPP*BASIN, data=raw3)
summary(A1.PreDL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   0.05  0.0468   0.237 0.6270  
## BASIN         6   2.04  0.3406   1.722 0.1155  
## SPP:BASIN     3   1.58  0.5261   2.660 0.0484 *
## Residuals   289  57.16  0.1978                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.PreDL, fill=SPP)) +
  geom_boxplot()

A1.DbL <- aov(abs.res.DbL ~ SPP*BASIN, data=raw3)
summary(A1.DbL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   1.53  1.5287   5.614 0.0185 *
## BASIN         6   1.69  0.2823   1.037 0.4015  
## SPP:BASIN     3   0.44  0.1482   0.544 0.6524  
## Residuals   289  78.70  0.2723                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A1.HL <- aov(abs.res.HL ~ SPP*BASIN, data=raw3)
summary(A1.HL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1   2.91  2.9084   6.563 0.0109 *
## BASIN         6   3.83  0.6377   1.439 0.1995  
## SPP:BASIN     3   3.42  1.1414   2.576 0.0541 .
## Residuals   289 128.07  0.4431                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A1.HD <- aov(abs.res.HD ~ SPP*BASIN, data=raw3)
summary(A1.HD)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1  0.013  0.0125   0.118 0.7311  
## BASIN         6  0.845  0.1408   1.330 0.2437  
## SPP:BASIN     3  1.054  0.3514   3.318 0.0203 *
## Residuals   289 30.605  0.1059                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HD, fill=SPP)) +
  geom_boxplot()

A1.HW <- aov(abs.res.HW ~ SPP*BASIN, data=raw3)
summary(A1.HW)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP           1  0.037 0.03728   0.403 0.52622   
## BASIN         6  1.676 0.27941   3.018 0.00708 **
## SPP:BASIN     3  0.151 0.05048   0.545 0.65172   
## Residuals   289 26.759 0.09259                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A1.SnL <- aov(abs.res.SnL ~ SPP*BASIN, data=raw3)
summary(A1.SnL)
##              Df Sum Sq Mean Sq F value Pr(>F)
## SPP           1   0.41  0.4066   1.139  0.287
## BASIN         6   1.63  0.2711   0.759  0.602
## SPP:BASIN     3   1.10  0.3677   1.030  0.380
## Residuals   289 103.18  0.3570
A1.OL <- aov(abs.res.OL ~ SPP*BASIN, data=raw3)
summary(A1.OL)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## SPP           1  0.125 0.12542   3.995 0.0466 *
## BASIN         6  0.315 0.05257   1.674 0.1271  
## SPP:BASIN     3  0.292 0.09728   3.099 0.0272 *
## Residuals   289  9.073 0.03139                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(BASIN), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Watersheds

A2.D <- aov(abs.res.D ~ SPP*WATERSHED, data=raw3)
summary(A2.D)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1   0.06  0.0635   0.415 0.5198  
## WATERSHED      13   4.33  0.3331   2.178 0.0106 *
## SPP:WATERSHED   5   1.96  0.3922   2.564 0.0274 *
## Residuals     280  42.82  0.1529                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.D, fill=SPP)) +
  geom_boxplot()

A2.P1 <- aov(abs.res.P1 ~ SPP*WATERSHED, data=raw3)
summary(A2.P1)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1   0.29  0.2878   1.568 0.2116  
## WATERSHED      13   3.73  0.2868   1.562 0.0957 .
## SPP:WATERSHED   5   0.78  0.1569   0.855 0.5120  
## Residuals     280  51.41  0.1836                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.P1.R <- aov(abs.res.P1.R ~ SPP*WATERSHED, data=raw3)
summary(A2.P1.R)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   1.80  1.7983  11.145 0.000957 ***
## WATERSHED      13   0.82  0.0629   0.390 0.972416    
## SPP:WATERSHED   5   0.16  0.0322   0.199 0.962521    
## Residuals     280  45.18  0.1614                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.P1.R, fill=SPP)) +
  geom_boxplot()

A2.LLSC <- aov(abs.res.LLSC ~ SPP*WATERSHED, data=raw3)
summary(A2.LLSC)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   1.40  1.4013   3.299   0.0704 .  
## WATERSHED      13  20.28  1.5596   3.672 2.15e-05 ***
## SPP:WATERSHED   5   4.75  0.9502   2.237   0.0509 .  
## Residuals     280 118.93  0.4248                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.LLSC, fill=SPP)) +
  geom_boxplot()

A2.SALL <- aov(abs.res.SALL ~ SPP*WATERSHED, data=raw3)
summary(A2.SALL)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1  0.135  0.1352   1.233  0.26772    
## WATERSHED      13  5.314  0.4088   3.730 1.67e-05 ***
## SPP:WATERSHED   5  1.779  0.3557   3.246  0.00725 ** 
## Residuals     280 30.685  0.1096                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.SALL, fill=SPP)) +
  geom_boxplot()

A2.SBLL <- aov(abs.res.SBLL ~ SPP*WATERSHED, data=raw3)
summary(A2.SBLL)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1   0.65  0.6523   5.611   0.0185 *  
## WATERSHED      13   2.98  0.2289   1.969   0.0233 *  
## SPP:WATERSHED   5   4.57  0.9138   7.861 6.06e-07 ***
## Residuals     280  32.55  0.1162                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.SBLL, fill=SPP)) +
  geom_boxplot()

A2.BD <- aov(abs.res.BD ~ SPP*WATERSHED, data=raw3)
summary(A2.BD)
##                Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP             1   0.33  0.3327   1.076 0.30056   
## WATERSHED      13  10.34  0.7951   2.571 0.00225 **
## SPP:WATERSHED   5   3.80  0.7607   2.460 0.03344 * 
## Residuals     280  86.59  0.3093                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.BD, fill=SPP)) +
  geom_boxplot()

A2.CPD <- aov(abs.res.CPD ~ SPP*WATERSHED, data=raw3)
summary(A2.CPD)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1  0.151 0.15069   1.809  0.17966    
## WATERSHED      13  3.765 0.28965   3.478 4.98e-05 ***
## SPP:WATERSHED   5  1.435 0.28691   3.445  0.00488 ** 
## Residuals     280 23.318 0.08328                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.CPD, fill=SPP)) +
  geom_boxplot()

A2.CPL <- aov(abs.res.CPL ~ SPP*WATERSHED, data=raw3)
summary(A2.CPL)
##                Df Sum Sq Mean Sq F value Pr(>F)
## SPP             1   0.03 0.03199   0.218  0.641
## WATERSHED      13   2.52 0.19397   1.319  0.201
## SPP:WATERSHED   5   1.01 0.20254   1.377  0.233
## Residuals     280  41.17 0.14703
A2.PreDL <- aov(abs.res.PreDL ~ SPP*WATERSHED, data=raw3)
summary(A2.PreDL)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1   0.05  0.0468   0.236 0.6272  
## WATERSHED      13   4.34  0.3336   1.685 0.0636 .
## SPP:WATERSHED   5   1.01  0.2019   1.020 0.4060  
## Residuals     280  55.43  0.1980                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A2.DbL <- aov(abs.res.DbL ~ SPP*WATERSHED, data=raw3)
summary(A2.DbL)
##                Df Sum Sq Mean Sq F value  Pr(>F)    
## SPP             1   1.53  1.5287   6.247   0.013 *  
## WATERSHED      13   4.76  0.3665   1.498   0.117    
## SPP:WATERSHED   5   7.55  1.5107   6.173 1.9e-05 ***
## Residuals     280  68.52  0.2447                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.DbL, fill=SPP)) +
  geom_boxplot()

A2.HL <- aov(abs.res.HL ~ SPP*WATERSHED, data=raw3)
summary(A2.HL)
##                Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP             1   2.91  2.9084   6.844 0.00938 **
## WATERSHED      13  10.21  0.7856   1.849 0.03602 * 
## SPP:WATERSHED   5   6.11  1.2227   2.877 0.01497 * 
## Residuals     280 118.99  0.4250                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HL, fill=SPP)) +
  geom_boxplot()

A2.HD <- aov(abs.res.HD ~ SPP*WATERSHED, data=raw3)
summary(A2.HD)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## SPP             1  0.013  0.0125   0.125 0.724090    
## WATERSHED      13  2.141  0.1647   1.641 0.073640 .  
## SPP:WATERSHED   5  2.270  0.4541   4.526 0.000553 ***
## Residuals     280 28.093  0.1003                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HD, fill=SPP)) +
  geom_boxplot()

A2.HW <- aov(abs.res.HW ~ SPP*WATERSHED, data=raw3)
summary(A2.HW)
##                Df Sum Sq Mean Sq F value  Pr(>F)   
## SPP             1  0.037 0.03728   0.408 0.52356   
## WATERSHED      13  2.615 0.20116   2.201 0.00974 **
## SPP:WATERSHED   5  0.380 0.07593   0.831 0.52868   
## Residuals     280 25.592 0.09140                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.HW, fill=SPP)) +
  geom_boxplot()

A2.SnL <- aov(abs.res.SnL ~ SPP*WATERSHED, data=raw3)
summary(A2.SnL)
##                Df Sum Sq Mean Sq F value Pr(>F)
## SPP             1   0.41  0.4066   1.132  0.288
## WATERSHED      13   2.76  0.2124   0.592  0.860
## SPP:WATERSHED   5   2.61  0.5222   1.454  0.205
## Residuals     280 100.53  0.3591
A2.OL <- aov(abs.res.OL ~ SPP*WATERSHED, data=raw3)
summary(A2.OL)
##                Df Sum Sq Mean Sq F value Pr(>F)  
## SPP             1  0.125 0.12542   4.005 0.0463 *
## WATERSHED      13  0.670 0.05153   1.646 0.0726 .
## SPP:WATERSHED   5  0.243 0.04859   1.552 0.1740  
## Residuals     280  8.767 0.03131                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw3, aes(x=factor(WATERSHED), y=abs.res.OL, fill=SPP)) +
  geom_boxplot()

Standardized

The ANOVAs above focus on differences of particular traits as a factor of species and location. If we want to get an idea of variation in general as a factor of species and location, we can standardize the residuals (essentially unitless z-scores of residuals).

sd.res.D <- append(abs(sd.lat.D), abs(sd.form.D))
sd.res.P1 <- append(abs(sd.lat.P1), abs(sd.form.P1))
sd.res.P1.R <- append(abs(sd.lat.P1.R), abs(sd.form.P1.R))
sd.res.LLSC<- append(abs(sd.lat.LLSC), abs(sd.form.LLSC))
sd.res.SALL<- append(abs(sd.lat.SALL), abs(sd.form.SALL))
sd.res.SBLL<- append(abs(sd.lat.SBLL), abs(sd.form.SBLL))
sd.res.BD<- append(abs(sd.lat.BD), abs(sd.form.BD))
sd.res.CPD<- append(abs(sd.lat.CPD), abs(sd.form.CPD))
sd.res.CPL<- append(abs(sd.lat.CPL), abs(sd.form.CPL))
sd.res.PreDL <- append(abs(sd.lat.PreDL), abs(sd.form.PreDL))
sd.res.DbL <- append(abs(sd.lat.DbL), abs(sd.form.DbL))
sd.res.HL<- append(abs(sd.lat.HL), abs(sd.form.HL))
sd.res.HD<- append(abs(sd.lat.HD), abs(sd.form.HD))
sd.res.HW <- append(abs(sd.lat.HW), abs(sd.form.HW))
sd.res.SnL <- append(abs(sd.lat.SnL), abs(sd.form.SnL))
sd.res.OL <- append(abs(sd.lat.OL), abs(sd.form.OL))


raw4 <- cbind(raw3, sd.res.D, sd.res.P1, sd.res.P1.R, sd.res.LLSC, sd.res.SALL, sd.res.SBLL, sd.res.BD, sd.res.CPD, sd.res.CPL, sd.res.PreDL, sd.res.DbL, sd.res.HL, sd.res.HD, sd.res.HW, sd.res.SnL, sd.res.OL)

raw5 <- cbind(raw4[1:14], stack(raw4[53:68])) 
## Warning in data.frame(..., check.names = FALSE): row names were found from a
## short variable and have been discarded
lat.raw5 <- raw5[raw5$SPP == "p.latipinna",]

form.raw5 <- raw5[raw5$SPP == "p.formosa",]

######ZONES#####

A3.lat <- aov(values~QUARTILE, data=lat.raw5)
summary(A3.lat)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## QUARTILE       3    9.8   3.274     7.1 9.57e-05 ***
## Residuals   2140  986.6   0.461                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A3.form <- aov(values~QUARTILE, data=form.raw5)
summary(A3.form)
##               Df Sum Sq Mean Sq F value Pr(>F)
## QUARTILE       3    2.5  0.8491   1.992  0.113
## Residuals   2652 1130.4  0.4263
#between species

A3 <- aov(values~QUARTILE*SPP, data=raw5)
summary(A3)
##                Df Sum Sq Mean Sq F value   Pr(>F)    
## QUARTILE        3    8.1  2.7123   6.139 0.000366 ***
## SPP             1    0.3  0.2740   0.620 0.430974    
## QUARTILE:SPP    3    4.8  1.5926   3.605 0.012837 *  
## Residuals    4792 2117.1  0.4418                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(QUARTILE), y=values, fill=SPP)) +
  geom_boxplot()

######BASINS#####

A4.lat <- aov(values~BASIN, data=lat.raw5)
summary(A4.lat)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## BASIN          5   12.9   2.579   5.605 3.87e-05 ***
## Residuals   2138  983.6   0.460                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A4.form <- aov(values~BASIN, data=form.raw5)
summary(A4.form)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## BASIN          4   15.6   3.912   9.281 1.91e-07 ***
## Residuals   2651 1117.3   0.421                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#between species

A4 <- aov(values~BASIN*SPP, data=raw5)
summary(A4)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## BASIN          6   27.9   4.657  10.616 9.59e-12 ***
## SPP            1    0.1   0.058   0.133    0.715    
## BASIN:SPP      3    1.4   0.453   1.034    0.376    
## Residuals   4789 2100.9   0.439                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(BASIN), y=values, fill=SPP)) +
  geom_boxplot()

#####WATERSHEDS#####
A5.lat <- aov(values~WATERSHED, data=lat.raw5)
summary(A5.lat)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## WATERSHED     11   26.6  2.4194   5.319 2.33e-08 ***
## Residuals   2132  969.8  0.4549                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
A5.form <- aov(values~WATERSHED, data=form.raw5)
summary(A5.form)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## WATERSHED      7   25.7   3.669   8.775 1.06e-10 ***
## Residuals   2648 1107.3   0.418                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#between species

A5 <- aov(values~WATERSHED*SPP, data=raw5)
summary(A5)
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## WATERSHED       13   40.6  3.1194   7.179 4.58e-14 ***
## SPP              1    0.3  0.2814   0.648    0.421    
## WATERSHED:SPP    5   12.3  2.4570   5.654 3.33e-05 ***
## Residuals     4780 2077.1  0.4345                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(raw5, aes(x=factor(WATERSHED), y=values, fill=SPP)) +
  geom_boxplot()

PCA analysis

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Variable chart:

  • D: dorsal ray count

  • P1: left pectoral ray count

  • P2.L: left pelvic rays

  • P2.R: right pelvic rays

  • A: anal rays

  • P1.R: right pectoral ray count

  • LLSC: lateral line scale count

  • SALL: scales above lateral line

  • SBLL: scales below lateral line

  • SBDF: scales before dorsal fin

  • TL: total length

  • SL: standard length

  • BD: body depth

  • CPD: caudal peduncle depth

  • CPL: caudal peduncle length

  • PreDL: pre-dorsal length

  • DbL: dorsal base length

  • HL/HW/HD: head length/width/depth

  • SnL: snout length

  • OL: ocular length

  • summary of PCs

{r, echo=FALSE}

PCA <- prcomp(raw1[, 10:31], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA) loadings <- PCA$rotation loadings[, 1:5]

VM_PCA <- varimax(PCA$rotation) summary(VM_PCA)

  • visualization of PCA

{r, echo=FALSE}

library(AMR) library(ggplot2)

library(ggfortify)

evplot <- function(ev) { # Broken stick model (MacArthur 1957) n <- length(ev) bsm <- data.frame(j=seq(1:n), p=0) bsm\(p[1] <- 1/n for (i in 2:n) bsm\)p[i] <- bsm\(p[i-1] + (1/(n + 1 - i)) bsm\)p <- 100*bsm\(p/n # Plot eigenvalues and % of variation for each axis op <- par(mfrow=c(2,1)) barplot(ev, main="Eigenvalues", col="bisque", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") barplot(t(cbind(100*ev/sum(ev), bsm\)p[n:1])), beside=TRUE, main=“% variation”, col=c(“bisque”,2), las=2) legend(“topright”, c(“% eigenvalue”, “Broken stick model”), pch=15, col=c(“bisque”,2), bty=“n”) par(op) }

ev <- PCA$sdev^2 evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot… not 100% confident I know what this means, but pretty sure PC1 is body size

plot6<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6

plot6.1<- autoplot(PCA, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot6.1

plot7<- autoplot(PCA, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7

plot7A<- autoplot(PCA, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7A

plot7B<- autoplot(PCA, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot7B

plot8<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8

plot8.1<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot8.1

plot9<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘QUARTILE’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9

plot9A<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘BASIN’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9A

plot9B<- autoplot(PCA, x=2, y=3, data = raw1, colour=‘WATERSHED’, shape=“SPP”, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot9B

PCA FOR RESITUDALS

not sure this is useful, but will have it here just in case

PCA2 <- prcomp(raw2[, 33:48], center=TRUE, scale. = TRUE) #includes all 22 traits summary(PCA2) loadings1 <- PCA2$rotation loadings1[, 1:5]

plot10<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=TRUE, loadings.colour=‘navyblue’, loadings.label=TRUE, loadings.label.colour=‘navyblue’, loadings.label.size=5, loadings.label.vjust= 1, loadings.label.hjust= 1.2, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology trait Residuals”) + theme_minimal() plot10

plot11<- autoplot(PCA2, x=1, y=2, data = raw2, colour=‘SPP’, loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type=‘norm’)+ ggtitle(“PCA Plot of Morphology traits”) + theme_minimal() plot11

Variable chart: